package com.boat.hbase.table.util;

import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.TreeSet;

import org.apache.hadoop.hbase.util.Bytes;

/**
 * @Description: TODO(用一句话描述该文件做什么)
 * @author boat
 * @date 2017年12月28日 上午10:55:17
 * @version V1.0
 */

public class HashChoreWoker {
	// 随机取机数目
	private int baseRecord;
	// rowkey生成器
	private HashRowKeyGenerator rkGen;
	// 取样时，由取样数目及region数相除所得的数量.
	private int splitKeysBase;
	// splitkeys个数
	private int splitKeysNumber;
	// 由抽样计算出来的splitkeys结果
	private byte[][] splitKeys;

	/**
	 * @param baseRecord
	 *            取样多少条，100万左右，太多会内存溢出
	 * @param prepareRegions
	 *            准备划分多少个region
	 */
	public HashChoreWoker(int baseRecord, int prepareRegions) {
		this.baseRecord = baseRecord;
		// 实例化rowkey生成器
		rkGen = new HashRowKeyGenerator();
		// 比Region的数目少1
		splitKeysNumber = prepareRegions - 1;
		splitKeysBase = baseRecord / prepareRegions;
	}

	public byte[][] calcSplitKeys() throws UnsupportedEncodingException {
		splitKeys = new byte[splitKeysNumber][];
		// 使用treeset保存抽样数据，已排序过
		TreeSet<byte[]> rows = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
		for (int i = 0; i < baseRecord; i++) {
			rows.add(rkGen.nextId());
		}
		int pointer = 0;
		Iterator<byte[]> rowKeyIter = rows.iterator();
		int index = 0;
		while (rowKeyIter.hasNext()) {
			byte[] tempRow = rowKeyIter.next();
			rowKeyIter.remove();
			if ((pointer != 0) && (pointer % splitKeysBase == 0)) {
				if (index < splitKeysNumber) {
					splitKeys[index] = tempRow;
					index++;
				}
			}
			pointer++;
		}
		rows.clear();
		rows = null;
		return splitKeys;
	}
}
